Code
source("0_load_data.R")source("0_load_data.R")# Function to map variable prefix to study wave
get_study_wave = function(var_name) {
prefix = substr(var_name, 1, 1)
wave_map = c(
"a" = "1st Contact",
"b" = "2 Year",
"c" = "3 Year",
"d" = "4 Year",
"e" = "In Home",
"g" = "7 Year",
"h" = "8 Year",
"i" = "9 Year",
"j" = "10 Year",
"l" = "12 Year",
"n" = "14 Year",
"p" = "16 Year",
"r" = "18 Year",
"u" = "21 Year",
"z" = "26 Year"
)
return(wave_map[prefix])
}
# Function to clean description labels
clean_description = function(x) {
x %>%
# Remove range info at end (e.g., ", 0-22", ", 1-9", ", 4-11")
str_remove(", \\d+-\\d+$") %>%
# Remove "see value labels"
str_remove(", see value labels") %>%
str_remove("see value labels") %>%
# Spell out abbreviations inline
str_replace("SOC employment", "Standard Occupational Classification employment") %>%
str_replace("MHQ\\)", "Mental Health Questionnaire)") %>%
str_replace("SLQ\\)", "questionnaire)") %>%
str_replace("G-game", "G-game (general cognitive ability)") %>%
str_replace_all(" qnr", " questionnaire") %>%
# Clean up extra whitespace
str_squish()
}
v_rq1x =
data.frame(
`Short Label` = rq1x_labels_clean,
Description = rq1x_labels,
`Teds Code` = ifelse(rq1x %in% colnames(df0), rq1x,paste0(rq1x,"*")),
`Range or Level` = sapply(rq1x, function(var) {
if (class(df[[var]]) == "numeric") {
paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
} else if (is.factor(df[[var]])) {
factor_levels = levels(df[[var]])
paste(c(paste0(factor_levels[1],"*"), factor_levels[-1]), collapse = ", ")
} else {
paste(unique(df[[var]]), collapse = ", ")
}
}),
N = sapply(rq1x, function(var) {
sum(!is.na(df[[var]]))
}),
`Study Wave` = sapply(rq1x, get_study_wave)
)
v_rq1x$Description = str_remove(v_rq1x$Description,"\\(1st.*")
v_rq1x$Description = clean_description(v_rq1x$Description)
# Manual edit for cens01pop98density study wave
v_rq1x$`Study.Wave`[v_rq1x$`Teds.Code` == "cens01pop98density"] = "1st Contact"
v_rq1x$`Study.Wave`[v_rq1x$`Teds.Code` == "pollution1998pca"] = "1st Contact"
# Clean uniform descriptions for participation indicator variables
rq1y_twin_descriptions = c(
"Parent-report twin booklet data present",
"Web test data present",
"Self-report questionnaire data present",
"Self-report behaviour booklet data present",
"Self-report questionnaire data present",
"TEDS21 phase 1 self-report questionnaire data present",
"TEDS26 Mental Health Questionnaire data present",
"CATSLife web test data present"
)
v_rq1y =
data.frame(
`Short Label` = rq1y_twin_labels_clean,
Description = rq1y_twin_descriptions,
`Teds Code` = ifelse(rq1y_twin1 %in% colnames(df0), rq1y_twin1, paste0(rq1y_twin1, "*")),
`Range or Level` = sapply(rq1y_twin1, function(var) {
if (class(df[[var]]) == "numeric") {
paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
} else if (is.factor(df[[var]])) {
factor_levels = levels(df[[var]])
paste(c(paste0(factor_levels[1], "*"), factor_levels[-1]), collapse = ", ")
} else {
paste(unique(df[[var]]), collapse = ", ")
}
}),
N = sapply(rq1y_twin1, function(var) {
sum(!is.na(df[[var]]))
}),
`Study Wave` = sapply(rq1y_twin, get_study_wave)
)
v_rq2 = data.frame(
`Short Label` = rq2y_labels_short,
Description = rq2y_labels,
`Teds Code` = ifelse(rq2y %in% colnames(df0), rq2y,paste0(rq2y,"*")),
`Range or Level` = sapply(rq2y, function(var) {
if (class(df[[var]]) == "numeric") {
paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
} else if (is.factor(df[[var]])) {
factor_levels = levels(df[[var]])
paste(c(paste0(factor_levels[1],"*"), factor_levels[-1]), collapse = ", ")
} else {
paste(unique(df[[var]]), collapse = ", ")
}
}),
N = sapply(rq2y, function(var) {
sum(!is.na(df[[var]]))
}),
`Study Wave` = sapply(rq2y, get_study_wave)
)
v_rq2$Description = str_remove(v_rq2$Description,"\\(2.*")
v_rq3 = data.frame(
`Short Label` = rq6y_labels,
Description = clean_description(var_to_label(rq6y)),
`Teds Code` = ifelse(rq6y %in% colnames(df0), rq6y, paste0(rq6y, "*")),
`Range or Level` = sapply(rq6y, function(var) {
if (class(df[[var]]) == "numeric") {
paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
} else if (is.factor(df[[var]])) {
factor_levels = levels(df[[var]])
paste(c(paste0(factor_levels[1], "*"), factor_levels[-1]), collapse = ", ")
} else {
paste(unique(df[[var]]), collapse = ", ")
}
}),
N = sapply(rq6y, function(var) {
sum(!is.na(df[[var]]))
}),
`Study Wave` = sapply(rq6y, get_study_wave)
)
v_rq5 = data.frame(
`Short Label` = rq5y_labels_short,
Description = clean_description(var_to_label(rq5y)),
`Teds Code` = ifelse(rq5y %in% colnames(df0), rq5y, paste0(rq5y, "*")),
`Range or Level` = sapply(rq5y, function(var) {
if (class(df[[var]]) == "numeric") {
paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
} else if (is.factor(df[[var]])) {
factor_levels = levels(df[[var]])
paste(c(paste0(factor_levels[1],"*"), factor_levels[-1]), collapse = ", ")
} else {
paste(unique(df[[var]]), collapse = ", ")
}
}),
N = sapply(rq5y, function(var) {
sum(!is.na(df[[var]]))
}),
`Study Wave` = sapply(rq5y, get_study_wave)
)
# Combine all v_rq dataframes with row indices for grouping
v_rq1x_indexed = cbind(row_group = "RQ1 Variables", row_id = 1:nrow(v_rq1x), v_rq1x)
v_rq1y_indexed = cbind(row_group = "RQ1 Outcome Variables", row_id = (nrow(v_rq1x)+1):(nrow(v_rq1x)+nrow(v_rq1y)), v_rq1y)
v_rq2_indexed = cbind(row_group = "RQ2 Variables", row_id = (nrow(v_rq1x)+nrow(v_rq1y)+1):(nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)), v_rq2)
v_rq3_indexed = cbind(row_group = "RQ3 Variables", row_id = (nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)+1):(nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)+nrow(v_rq3)), v_rq3)
v_rq5_indexed = cbind(row_group = "RQ5 Variables", row_id = (nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)+nrow(v_rq3)+1):(nrow(v_rq1x)+nrow(v_rq1y)+nrow(v_rq2)+nrow(v_rq3)+nrow(v_rq5)), v_rq5)
v_rq_combined = rbind(v_rq1x_indexed, v_rq1y_indexed, v_rq2_indexed, v_rq3_indexed, v_rq5_indexed)
gt(v_rq_combined) %>%
tab_row_group(
label = "E) RQ4 Outcome Variables",
rows = row_group == "RQ5 Variables"
) %>%
tab_row_group(
label = "D) RQ3 Outcome Variables",
rows = row_group == "RQ3 Variables"
) %>%
tab_row_group(
label = "C) RQ2 Outcome Variables",
rows = row_group == "RQ2 Variables"
) %>%
tab_row_group(
label = "B) RQ1-2 Participation Indicators (1 = yes, 0 = no)",
rows = row_group == "RQ1 Outcome Variables"
) %>%
tab_row_group(
label = "A) Baseline variables used to predict later participation, and for IP weighting",
rows = row_group == "RQ1 Variables"
) %>%
cols_hide(c(row_group, row_id)) %>%
cols_width(
`Range.or.Level` ~ px(100)
) %>%
cols_label_with(fn = ~ gsub("\\.", " ", .x)) %>%
tab_style(
style = cell_text(font = "Times New Roman", size = px(12)),
locations = cells_body()
) %>%
tab_style(
style = cell_text(font = "Times New Roman", size = px(12)),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_text(font = "Times New Roman", size = px(12)),
locations = cells_row_groups()
) %>%
tab_footnote(
footnote = "Note. MFQ = Mood and Feelings Questionnaire; SDQ = Strengths and Difficulties Questionnaire; GCSE = General Certificate of Secondary Education; KS3 = Key Stage 3; GAD-D = Generalized Anxiety Disorder scale; PARCA = Parent Report of Children's Abilities; MZ = Monozygotic; DZ = Dizygotic; HNC = Higher National Certificate; HND = Higher National Diploma; CSE = Certificate of Secondary Education."
)| Short Label | Description | Teds Code | Range or Level | N | Study Wave |
|---|---|---|---|---|---|
| A) Baseline variables used to predict later participation, and for IP weighting | |||||
| Twin Sex | Twin sex, 0female 1male | sex1 | 0 — 1 | 26040 | NA |
| Mother age at birth | Age in years of natural mother at time of birth of twins | amumagetw | 14 — 45 | 25563 | 1st Contact |
| Father age at birth | Age in years of natural father at time of birth of twins | adadagetw | 16 — 66 | 22975 | 1st Contact |
| Single Parent | Single Parent | asingle | cohabiting biological mother and father / cohabiting biological parent with other*, single parent | 25587 | 1st Contact |
| Zygosity | Twin pair zygosity (best available estimate), 1MZ 2DZ | zygos | 1*, 2 | 26040 | 26 Year |
| Mother medical risk | Mother medical risk factor composite scale | amedtot | -2.49 — 4.98 | 25864 | 1st Contact |
| Father employment level | Father Standard Occupational Classification employment level | afasoc2* | 1*, 2, 3, 4, 5, 6, 7, 8, 9, caring for children at home, no job | 23111 | 1st Contact |
| Father education level | Male parent highest qualification level | afahqual | CSE grade 1 or O-level/GCSE grade A-C*, no qualifications, CSE grade 2-5 or O-level/GCSE grade D-G, A-level or S-level, HNC, HND, undergraduate degree, postgraduate qualification | 23049 | 1st Contact |
| Mother employment level | Mother Standard Occupational Classification employment level | amosoc2* | caring for children at home*, 1, 2, 3, 4, 5, 6, 7, 8, 9, no job | 25650 | 1st Contact |
| Mother education level | Female parent highest qualification level | amohqual | no qualifications*, CSE grade 2-5 or O-level/GCSE grade D-G, CSE grade 1 or O-level/GCSE grade A-C, A-level or S-level, HNC, HND, undergraduate degree, postgraduate qualification | 25629 | 1st Contact |
| Twin medical risk | Twin medical risk factor composite scale | atwmed1 | -1.88 — 3.33 | 25832 | 1st Contact |
| Ethnic origin | Ethnic origin of twins, original codes | aethnicc | White*, Asian, Black, Mixed race, Other | 25946 | 1st Contact |
| Language at home | Main language spoken at home | alang | other*, English, English + other | 25678 | 1st Contact |
| Older siblings | Number of older siblings | anoldsib | 0*, 1, 2, 3, 4, 5 or more | 26040 | 1st Contact |
| Younger siblings | Number of younger siblings | anyngsib | 0*, 1, 2 or more | 26040 | 1st Contact |
| Twins club member | Member of a Twins Club | atwclub | 0*, 1 | 25148 | 1st Contact |
| Childcare by others | Twins looked after by anyone else | alookels | 0*, 1 | 24791 | 1st Contact |
| Smoking in pregnancy | Smoked cigarettes while pregnant | asmoke | 0*, 1 | 25932 | 1st Contact |
| Alcohol in pregnancy | Drank alcohol while pregnant | adrink | 0*, 1 | 25783 | 1st Contact |
| Severe stress in pregnancy | Severe stress during pregnancy | astress | 0*, 1 | 25889 | 1st Contact |
| Pollution index | Principal Component of 1998 pollution variables | pollution1998pca | -2.47 — 4.57 | 23030 | 1st Contact |
| B) RQ1-2 Participation Indicators (1 = yes, 0 = no) | |||||
| Y4 (parent-report twin booklet) | Parent-report twin booklet data present | dtwdata1 | 0 — 1 | 26040 | 4 Year |
| Y12 (web tests) | Web test data present | lcwdata1 | 0 — 1 | 26040 | 12 Year |
| Y12 (questionnaire) | Self-report questionnaire data present | lcqdata1 | 0 — 1 | 26040 | 12 Year |
| Y16 (behaviour booklet) | Self-report behaviour booklet data present | pcbhdata1 | 0 — 1 | 26040 | 16 Year |
| Y18 (questionnaire) | Self-report questionnaire data present | rcqdata1 | 0 — 1 | 26040 | 18 Year |
| Y21 (TEDS21 phase-1 questionnaire) | TEDS21 phase 1 self-report questionnaire data present | u1cdata1 | 0 — 1 | 26040 | 21 Year |
| Y26 (TEDS26 questionnaire) | TEDS26 Mental Health Questionnaire data present | zmhdata1 | 0 — 1 | 26040 | 26 Year |
| Y26 (CATSLife web tests) | CATSLife web test data present | zcdata1 | 0 — 1 | 26040 | 26 Year |
| C) RQ2 Outcome Variables | |||||
| Maternal Education | Maternal Education (formatted as numeric variable) | amohqualn1 | 1 — 8 | 25629 | 1st Contact |
| Vocabulary | Vocabulary total score | bvocab1 | 0 — 100 | 11830 | 2 Year |
| Grammar | Grammar composite score | bgramma1 | 0 — 2 | 11783 | 2 Year |
| Parent-admin cognition | Parent-administered Parca mean score | badparn1 | -3.09 — 3.45 | 11762 | 2 Year |
| Parent-report cognition | Parent-reported Parca total score | breparc1 | 0 — 26 | 11872 | 2 Year |
| Conduct problems | Conduct SDQ-comparable Behar subscale | bsdqccont1 | 0 — 8 | 11815 | 2 Year |
| Emotional problems | Emotion SDQ-comparable Behar subscale | bsdqcemot1 | 0 — 4 | 11823 | 2 Year |
| Hyperactivity | Hyperactivity SDQ-comparable Behar subscale | bsdqchypt1 | 0 — 6 | 11836 | 2 Year |
| Peer problems | Peer SDQ-comparable Behar subscale | bsdqcpert1 | 0 — 6 | 11565 | 2 Year |
| Prosocial behavior | Prosocial SDQ-comparable Behar subscale | bsdqcprot1 | 0 — 10 | 11729 | 2 Year |
| D) RQ3 Outcome Variables | |||||
| Y12: Depression (MFQ) | MFQ scale from 11 MFQ items (child self-report) at 12 | lcmfqt1 | 0 — 22 | 11432 | 12 Year |
| Y12: Externalising | SDQ Externalising scale at 12 | lsdqext1 | 0 — 20 | 11389 | 12 Year |
| Y12: Cognitive ability | G composite scale from child web tests at 12, standardised | lcg1 | -3.67 — 3.04 | 8458 | 12 Year |
| Y16: GCSE core subjects grade | Core subjects (English, maths, science): mean grade in GCSE results (twin exams at 16) | pcexgcsecoregrdm1 | 4 — 11 | 12982 | 16 Year |
| E) RQ4 Outcome Variables | |||||
| Y12: Cognitive ability | G composite scale from child web tests at 12, standardised | lcg1 | -3.67 — 3.04 | 8458 | 12 Year |
| Y14: Cognitive ability | G composite scale from child web tests at 14, standardised | ncg1 | -4.12 — 3.17 | 5341 | 14 Year |
| Y16: Cognitive ability | G composite scale from child web tests at 16, standardised | pcg1 | -2.86 — 4.06 | 4767 | 16 Year |
| Y21: G-game total score | G-game (general cognitive ability) overall total score | ucgt1 | 3 — 40 | 4549 | 21 Year |
| Y14: KS3 academic achievement | End of KS3 all-subject Academic achievement mean level (from parent questionnaire) | npks3tall1 | 1 — 9 | 5436 | 14 Year |
| Y16: GCSE core subjects grade | Core subjects (English, maths, science): mean grade in GCSE results (twin exams at 16) | pcexgcsecoregrdm1 | 4 — 11 | 12982 | 16 Year |
| Y21: Highest qualification | Twin probable highest level of qualification including current study (TEDS21 phase 1 twin questionnaire), 1-11 | u1chqualp1 | 1 — 11 | 8912 | 21 Year |
| Y26: Highest qualification | Demographics item: highest qualification ordinal level (TEDS26 twin Mental Health Questionnaire) | zmhhqual1 | 1 — 11 | 8243 | 26 Year |
| Y12: Depression (MFQ) | MFQ scale from 11 MFQ items (child self-report) at 12 | lcmfqt1 | 0 — 22 | 11432 | 12 Year |
| Y16: Depression (MFQ) | MFQ total scale (child behaviour questionnaire at 16) | pcbhmfqt1 | 0 — 26 | 9906 | 16 Year |
| Y21: Depression (MFQ) | MFQ overall total score (TEDS21 phase 1 twin questionnaire) | u1cmfqt1 | 0 — 16 | 9204 | 21 Year |
| Y26: Depression (MFQ) | MFQ overall total score (TEDS26 twin Mental Health Questionnaire) | zmhmfqt1 | 0 — 26 | 8306 | 26 Year |
| Y21: Anxiety (GAD-D) | General Anxiety overall total score (TEDS21 phase 2 twin questionnaire) | u2cganxt1 | 0 — 40 | 8236 | 21 Year |
| Y26: Anxiety (GAD-D) | GAD-D (General Anxiety) overall total score (TEDS26 twin Mental Health Questionnaire) | zmhganxt1 | 0 — 40 | 8022 | 26 Year |
| Y12: Externalising | SDQ Externalising scale at 12 | lsdqext1 | 0 — 20 | 11389 | 12 Year |
| Y16: Externalising | SDQ Externalising scale at 16 | psdqext1 | 0 — 20 | 9889 | 16 Year |
| Y21: Externalising | SDQ Externalising scale at 21 | usdqext1 | 0 — 19 | 9210 | 21 Year |
| Y26: Externalising | SDQ Externalising scale at 26 | zsdqext1 | 0 — 19 | 7718 | 26 Year |
| Note. MFQ = Mood and Feelings Questionnaire; SDQ = Strengths and Difficulties Questionnaire; GCSE = General Certificate of Secondary Education; KS3 = Key Stage 3; GAD-D = Generalized Anxiety Disorder scale; PARCA = Parent Report of Children's Abilities; MZ = Monozygotic; DZ = Dizygotic; HNC = Higher National Certificate; HND = Higher National Diploma; CSE = Certificate of Secondary Education. | |||||
df_age = df %>%
select(contains("age")) %>%
select(-contains("genpro"), -contains("mumage"), - contains("dadage"), -ends_with("2"))
df_age_long = df_age %>%
`colnames<-`(var_to_label(colnames(df_age))) %>%
pivot_longer(cols = everything())
df_age_long %>%
mutate(name = factor(name, levels = var_to_label(colnames(df_age)))) %>%
ggplot(aes(x=value)) +
geom_histogram(bins = 100) +
facet_wrap(~name, ncol =3) +
scale_x_continuous(breaks = seq(0,30,by=2)) +
theme_bw() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank()
)Warning: Removed 202363 rows containing non-finite outside the scale range
(`stat_bin()`).
save_plot("11_participant_ages_each_timepoint", width = 12, height = 12)Warning: Removed 202363 rows containing non-finite outside the scale range
(`stat_bin()`).
Removed 202363 rows containing non-finite outside the scale range
(`stat_bin()`).
[1] TRUE
df %>%
filter(twin == 1) %>%
count(amohqual) %>%
mutate(
numeric_code = as.numeric(amohqual),
percent = n / sum(n)
) %>%
select(numeric_code, amohqual, n, percent) %>%
gt() %>%
cols_label(
numeric_code = "Code",
amohqual = "Education Level",
n = "N",
percent = "Percent"
) %>%
fmt_percent(columns = percent)| Code | Education Level | N | Percent |
|---|---|---|---|
| 1 | no qualifications | 1208 | 9.28% |
| 2 | CSE grade 2-5 or O-level/GCSE grade D-G | 2000 | 15.36% |
| 3 | CSE grade 1 or O-level/GCSE grade A-C | 4779 | 36.71% |
| 4 | A-level or S-level | 1706 | 13.10% |
| 5 | HNC | 359 | 2.76% |
| 6 | HND | 486 | 3.73% |
| 7 | undergraduate degree | 1495 | 11.48% |
| 8 | postgraduate qualification | 782 | 6.01% |
| NA | NA | 205 | 1.57% |
In some analyses, maternal education is treated as a numeric variable (1-8), which assumes a linear relationship with child outcomes. To check whether this assumption is reasonable, we examine the predicted child education & cognition outcomes at each maternal education level.
We regress child outcomes on maternal education using dummy coding (i.e., treating each education level as a separate category). The plots below show the predicted outcome for each education level. If the linearity assumption holds, these points should fall approximately on a straight line.
We find that mothers with A Levels have generally similar child outcomes to those with Higher National Certificate and Higher National Diploma qualifications. Thus, the assumption of linearity seems unreasonable, and it might be problematic to have these two categories as separate numeric codes.
# Helper function to create marginal effects plot
plot_education_effects = function(model, y_label) {
marginaleffects::predictions(model, by = "amohqual") %>%
data.frame() %>%
rename(
prediction = estimate,
education_level = amohqual
) %>%
ggplot(aes(y = prediction, x = education_level, group = 1)) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
geom_point(size = 2) +
geom_line() +
labs(
x = "Maternal Education Level",
y = y_label
) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)
)
}
Call:
lm(formula = npks3tall1 ~ amohqual, data = .)
Residuals:
Min 1Q Median 3Q Max
-4.8335 -0.4234 0.0387 0.4676 3.4857
Coefficients:
Estimate Std. Error t value
(Intercept) 5.23404 0.06752 77.516
amohqualCSE grade 2-5 or O-level/GCSE grade D-G 0.28023 0.08062 3.476
amohqualCSE grade 1 or O-level/GCSE grade A-C 0.53886 0.07145 7.541
amohqualA-level or S-level 0.78125 0.07623 10.248
amohqualHNC 0.66686 0.10415 6.403
amohqualHND 0.66195 0.09346 7.082
amohqualundergraduate degree 0.97547 0.07648 12.754
amohqualpostgraduate qualification 1.01003 0.08401 12.023
Pr(>|t|)
(Intercept) < 2e-16 ***
amohqualCSE grade 2-5 or O-level/GCSE grade D-G 0.000517 ***
amohqualCSE grade 1 or O-level/GCSE grade A-C 6.32e-14 ***
amohqualA-level or S-level < 2e-16 ***
amohqualHNC 1.79e-10 ***
amohqualHND 1.80e-12 ***
amohqualundergraduate degree < 2e-16 ***
amohqualpostgraduate qualification < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.7397 on 2693 degrees of freedom
(10114 observations deleted due to missingness)
Multiple R-squared: 0.1102, Adjusted R-squared: 0.1078
F-statistic: 47.63 on 7 and 2693 DF, p-value: < 2.2e-16
plot_education_effects(model_ks3, "Predicted KS3 Score")save_plot("11_meducation_ks3_plot", width = 6, height = 5)
Call:
lm(formula = pcexgcsecoregrdm1 ~ amohqual, data = .)
Residuals:
Min 1Q Median 3Q Max
-4.8174 -0.6731 0.0569 0.8087 3.0915
Coefficients:
Estimate Std. Error t value
(Intercept) 7.90847 0.06435 122.894
amohqualCSE grade 2-5 or O-level/GCSE grade D-G 0.28280 0.07571 3.735
amohqualCSE grade 1 or O-level/GCSE grade A-C 0.76463 0.06818 11.214
amohqualA-level or S-level 1.20897 0.07350 16.449
amohqualHNC 1.13466 0.10065 11.274
amohqualHND 1.21039 0.09213 13.137
amohqualundergraduate degree 1.73733 0.07321 23.729
amohqualpostgraduate qualification 1.79211 0.08073 22.198
Pr(>|t|)
(Intercept) < 2e-16 ***
amohqualCSE grade 2-5 or O-level/GCSE grade D-G 0.000189 ***
amohqualCSE grade 1 or O-level/GCSE grade A-C < 2e-16 ***
amohqualA-level or S-level < 2e-16 ***
amohqualHNC < 2e-16 ***
amohqualHND < 2e-16 ***
amohqualundergraduate degree < 2e-16 ***
amohqualpostgraduate qualification < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.105 on 6431 degrees of freedom
(6376 observations deleted due to missingness)
Multiple R-squared: 0.1821, Adjusted R-squared: 0.1812
F-statistic: 204.5 on 7 and 6431 DF, p-value: < 2.2e-16
plot_education_effects(model_gcse, "Predicted GCSE Score")save_plot("11_meducation_gcse_plot", width = 6, height = 5)
Call:
lm(formula = lcg1 ~ amohqual, data = .)
Residuals:
Min 1Q Median 3Q Max
-3.8039 -0.6214 0.0563 0.6778 2.6524
Coefficients:
Estimate Std. Error t value
(Intercept) -0.54041 0.06525 -8.282
amohqualCSE grade 2-5 or O-level/GCSE grade D-G 0.23722 0.07851 3.022
amohqualCSE grade 1 or O-level/GCSE grade A-C 0.43448 0.06979 6.225
amohqualA-level or S-level 0.67126 0.07566 8.872
amohqualHNC 0.58609 0.10728 5.463
amohqualHND 0.65015 0.09575 6.790
amohqualundergraduate degree 0.92674 0.07491 12.372
amohqualpostgraduate qualification 0.93773 0.08317 11.275
Pr(>|t|)
(Intercept) < 2e-16 ***
amohqualCSE grade 2-5 or O-level/GCSE grade D-G 0.00253 **
amohqualCSE grade 1 or O-level/GCSE grade A-C 5.28e-10 ***
amohqualA-level or S-level < 2e-16 ***
amohqualHNC 4.95e-08 ***
amohqualHND 1.28e-11 ***
amohqualundergraduate degree < 2e-16 ***
amohqualpostgraduate qualification < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9634 on 4195 degrees of freedom
(8612 observations deleted due to missingness)
Multiple R-squared: 0.07031, Adjusted R-squared: 0.06876
F-statistic: 45.32 on 7 and 4195 DF, p-value: < 2.2e-16
plot_education_effects(model_cog, "Predicted Cognitive Score (g)")save_plot("11_meducation_cognitive_plot", width = 6, height = 5)